/* START 4_splitLongPanel.do */

/* prerequisite packages: fillmissing, labutil2, gtools, parallel,
ivreghdfe, ranktest */


set more off
cap clear

/* PATHS */
qui do "code/config.do"
qui do "./code/99_accessoryFunctions.do"

/* Initialize 12 threads */
parallel initialize 12

/* load long panel */
gzuse "data/panel.dta.gz", clear

/**************************************************/
/* SUBSET TO A SYMMETRIC WINDOW AROUND 12/27/2015 */
/**************************************************/

/* Keep to after 06/2017. Note that DTL3 opens 10/21/2017 */

/* keep if START <= tc("31dec2016 23:59:59") */
keep if START >= tc("31may2017 23:59:59")
/* keep if TKT_TYP_CD == 38 */

local sample 0
/* Obtain list of IDs and sample 5% */
if `sample' == 1 {
	frame put CRD_NUM TKT_TYP_CD, into(id)
	frame id {
		gcollapse (count) TKT_TYP_CD, by(CRD_NUM)
		/* gstats sum TKT_TYP_CD, det */
		ren TKT_TYP_CD trips
		keep if trips >= 15  /* modal # trips in this sample is 13 */
		sample 5
		hashsort CRD_NUM
	}

	frlink m:1 CRD_NUM, frame(id)
	keep if id != .
	drop id
	frame drop id
	save "data/demandSample.dta", replace
	}
else {
	gegen trips = count(TKT_TYP_CD), by(CRD_NUM)
	keep if trips >= 10 /* modal # trips in this post-June 2017 sample is 12 */
	}


/**************************************/
/* COMPUTE MODAL ORIGINS/DESTINATIONS */
/**************************************/

label copy oa1 oa
label drop oa1
label values oa da oa

by CRD_NUM: gen int trip_number = _n
hashsort CRD_NUM trip_number
order CRD_NUM trip_number START
drop END

/* generate modal origins/destinations IN THE MORNING/EVENING.
cutoffs: (5.00am, 11.59am) and (4.00pm, 11.59pm) */
frame put CRD_NUM trip_number START oa da op dp, into(modal_trips)

frame modal_trips {
	cap gen byte hstart = hh(START)
	gen byte am = hstart >= 5 & hstart <= 11
	gen byte pm = hstart >= 16 & hstart <= 23
	tempfile tmp
	save `tmp', replace
	}

local i = 1
timer clear
local subzones oa da
local planning_areas op dp
foreach timeOfDay in am pm {
	foreach x in /* `subzones' */ `planning_areas' {
		disp "Now generating `x'_mode_`timeOfDay'"
		frame modal_trips {
			cap lab drop `x'_mode_`timeOfDay'
			/* drop if `timeOfDay' == 0 */
			timer on `i'
			hashsort CRD_NUM `timeOfDay' `x'
			order CRD_NUM `timeOfDay' `x'
			parallel, by(CRD_NUM `timeOfDay'): ///
			  egen int `x'_mode_`timeOfDay' = mode(`x') ///
			  if `timeOfDay' == 1, by(CRD_NUM `timeOfDay') minmode missing
			parallel, by(CRD_NUM): fillmissing `x'_mode_`timeOfDay', by(CRD_NUM)
			timer off `i'
			}
		local i = `i' + 1
		}
	}
frame modal_trips {
	foreach timeOfDay in am pm {
		/* foreach x in oa da { */
			/* cap labdeval `x'_mode_`timeOfDay', copy(oa) */
			/* } */
		foreach x in op dp {
			cap labdeval `x'_mode_`timeOfDay', copy(op)
			}
		}
	}


frame modal_trips {
	hashsort CRD_NUM trip_number
	compress
	save `tmp', replace
	}

frame drop modal_trips
fmerge 1:1 CRD_NUM trip_number ///
  using `tmp', keepusing(*_mode_*)
drop _merge
compress
save "data/demand-subzones-after-june-2017.dta", replace

/**************************/
/* GENERATE HOME AND WORK */
/**************************/

/* specify home planning area. If subzones match (or home planning
area nonempty), then specify morning departure subzone as home */

use "data/demand-subzones-after-june-2017.dta", clear

foreach name in pHome aHome pWork aWork {
	cap label drop `name'
	}
gen pHome = op_mode_am if op_mode_am == dp_mode_pm
lab val pHome op
gen aHome = oa_mode_am if oa_mode_am == da_mode_pm | pHome != .
lab val aHome oa
replace pHome = op_mode_am if aHome != .


/* similarly specify work planning area */

gen pWork = dp_mode_am if dp_mode_am == op_mode_pm
lab val pWork op
gen aWork = da_mode_am if da_mode_am == oa_mode_pm | pWork != .
lab val aWork oa
replace pWork = dp_mode_am if aWork != .

/*******************************/
/* GENERATE COMMUTING MATRICES */
/*******************************/

/* generate week info for subsetting */
gen dayOfWeek = dow(dofc(START))
gen date = dofc(START)
format date %td
gen week = wofd(date)
format week %tw
gen qr = quarter(date)
gen yr = year(date)
drop date

frame put CRD_NUM TKT_TYP_CD op dp pHome pWork dayOfWeek week qr yr ///
  if pWork != . & pHome != ., into(matP)
frame put CRD_NUM TKT_TYP_CD oa da t aHome aWork dayOfWeek week qr yr ///
  if aWork != . & aHome != ., into(matA)

frame matP {
	foreach workfareFlag in 0 1 {
		/* generate homes */
		preserve
		if `workfareFlag' == 1 {
			keep if TKT_TYP_CD == 38
			}
		gcollapse (nunique) CRD_NUM, by(pHome)
		decode pHome, gen(home_pl)
		order home_pl
		drop pHome
		ren CRD_NUM pop
		if `workfareFlag' == 1 {
			save "data/matHomeDemandWorkfare.dta", replace
			}
		else {
			save "data/matHomeDemand.dta", replace
			}
		restore

	/* generate conditional commuting matrices */
		preserve
		if `workfareFlag' == 1 {
			keep if TKT_TYP_CD == 38
			}
		gcollapse (nunique) CRD_NUM, by (pHome pWork)
		decode pWork, gen(work_pl)
		drop pWork
		greshape wide CRD_NUM, by(pHome) keys(work_pl)
		decode pHome, gen(home_pl)
		drop pHome
		order home_pl
		ren CRD_NUM* *
		if `workfareFlag' == 1 {
			save "data/matCommuteDemandWorkfare.dta", replace
			}
		else {
			save "data/matCommuteDemand.dta", replace
			}
		restore
		}
	/* GENERATE LEISURE (weekend) TRIP MATRICES FOR PLANNING AREAS */
   
   keep if dayOfWeek == 0 | dayOfWeek == 6 /* | (am == 0 & pm == 0) */
   keep if pHome != dp | op == dp
   gcollapse (count) dayOfWeek (mean) TKT_TYP_CD, by(CRD_NUM pHome dp)
	ren dayOfWeek leisureTrips

	/* Generate modal leisure destination */
	hashsort CRD_NUM dp leisureTrips
	gegen maxTrips = max(leisureTrips), by(CRD_NUM)
	gen argmax = 1 if leisureTrips == maxTrips
	by CRD_NUM: gegen numArgmax = count(argmax)
	
	keep if argmax == 1
	by CRD_NUM: sample 1, count /* if repeated max, draw at random */

	foreach workfareFlag in 0 1 {
		preserve
		if `workfareFlag' == 1 {
			keep if TKT_TYP_CD == 38
			}
		gcollapse (nunique) CRD_NUM, by (pHome dp)
		decode dp, gen(dest_pl)
		replace dest_pl = "missing" if dest_pl == ""
		drop dp
		greshape wide CRD_NUM, by(pHome) keys(dest_pl)
		decode pHome, gen(home_pl)
		drop pHome
		order home_pl
		ren CRD_NUM* *
		if `workfareFlag' == 1 {
			save "data/matLeisureDemandWorkfare.dta", replace
			}
		else {
			save "data/matLeisureDemand.dta", replace
			}
		restore
		}

	}

frame matA {
	foreach workfareFlag in 0 1 {
		/* generate homes */
		preserve
		if `workfareFlag' == 1 {
			keep if TKT_TYP_CD == 38
			}
		gcollapse (nunique) CRD_NUM, by(aHome)
		decode aHome, gen(home_area)
		order home_area
		drop aHome
		ren CRD_NUM pop
		if `workfareFlag' == 1 {
			save "data/matAHomeDemandWorkfare.dta", replace
			}
		else {
			save "data/matAHomeDemand.dta", replace
			}
		restore

	/* generate conditional commuting matrices */
		preserve
		if `workfareFlag' == 1 {
			keep if TKT_TYP_CD == 38
			}
		ren CRD_NUM c
		gcollapse (nunique) c, by (aHome aWork)
		decode aWork, gen(work_area)
		drop aWork
		greshape wide c, by(aHome) keys(work_area)
		decode aHome, gen(home_area)
		drop aHome
		order home_area
		ren c* *
		if `workfareFlag' == 1 {
			save "data/matACommuteDemandWorkfare.dta", replace
			}
		else {
			save "data/matACommuteDemand.dta", replace
			}
		restore
		}
	/* GENERATE LEISURE (weekend) TRIP MATRICES FOR PLANNING AREAS */
   
   keep if dayOfWeek == 0 | dayOfWeek == 6 /* | (am == 0 & pm == 0) */
   keep if aHome != da | oa == da

	/* compute static quantities */
	frame put CRD_NUM TKT_TYP_CD dayOfWeek aHome da, into(matAAgg)
	}

/* COMPUTE AGGREGATE STATS */
frame matAAgg {
   gcollapse (count) dayOfWeek (mean) TKT_TYP_CD, by(CRD_NUM aHome da)
	ren dayOfWeek leisureTrips

	/* Generate modal leisure destination for now */
	hashsort CRD_NUM da leisureTrips
	gegen maxTrips = max(leisureTrips), by(CRD_NUM)
	gen argmax = 1 if leisureTrips == maxTrips
	by CRD_NUM: gegen numArgmax = count(argmax)
	
	keep if argmax == 1
	by CRD_NUM: sample 1, count /* if repeated max, draw at random */

	foreach workfareFlag in 0 1 {
		preserve
		if `workfareFlag' == 1 {
			keep if TKT_TYP_CD == 38
			}
		gcollapse (nunique) CRD_NUM, by (aHome da)
		decode da, gen(dest_area)
		replace dest_area = "missing" if dest_area == ""
		drop da
		ren CRD_NUM c
		greshape wide c, by(aHome) keys(dest_area)
		decode aHome, gen(home_area)
		drop aHome
		order home_area
		ren c* *
		if `workfareFlag' == 1 {
			save "data/matALeisureDemandWorkfare.dta", replace
			}
		else {
			save "data/matALeisureDemand.dta", replace
			}
		restore
		}
	}

frame matA {
	/* first compute number of trips per week/quarter */
	preserve
	gcollapse (count) dayOfWeek (mean) TKT_TYP_CD, by(aHome da yr qr)
	ren dayOfWeek leisureTrips
	drop TKT_TYP_CD
	drop if da == .
	
	/* fill in missing observations */
	gen int Time = (yr-2017) * 4 + (qr - 1)
	gen long Id = (aHome - 1) * 305 + (da - 1)
	tsset Id Time
	tsfill, full
	bys Id: fillmissing aHome da, with(any)

	replace yr = 2017 if Time <= 3 & yr == .
	replace yr = 2018 if yr == .
	replace qr = (Time + 1) if yr == 2017 & qr == .
	replace qr = (Time - 3) if yr == 2018 & qr == .
	replace leisureTrips = 0 if leisureTrips == .
	
	compress
	save "data/matALeisureByQuarterAfterJune2017.dta", replace
	restore

	/* next compute average travel time per year-quarter */
	preserve
	gcollapse (mean) t, by(aHome da yr qr)
	drop if aHome == . | da == .
	gen int Time = (yr-2017) * 4 + (qr - 1)
	gen long Id = (aHome - 1) * 305 + (da - 1)
	tsset Id Time
	tsfill, full
	bys Id: fillmissing aHome da, with(any)

	replace yr = 2017 if Time <= 3 & yr == .
	replace yr = 2018 if yr == .
	replace qr = (Time + 1) if yr == 2017 & qr == .
	replace qr = (Time - 3) if yr == 2018 & qr == .

	bys aHome da: gegen t_mean = mean(t)
	replace t = t_mean if t == .
	drop t_mean
	compress
	save "data/travelTimeLeisureByQuarterAfterJune2017.dta", replace
	}
	

/* Adds week labels to the 2015-2016 data and generates
commuting/leisure matrices thereof. */

gzuse "data/panel.dta.gz"

/* generate weekly dates */

gen date = dofc(START)
format date %td
gen week = wofd(date)
format week %tw
gen qr = qofd(date)
format qr %tq

gen after = 0
replace after = (date >= td(27dec2015))

/* generate commute/travel times, by planning area and subzone and BEFORE/AFTER */
preserve
gcollapse (mean) t, by(op dp after)
drop if op == . | dp == .
order (op dp after)
save "data/commuteTimesPl.dta", replace
restore

preserve
gcollapse (mean) t, by(oa da after)
drop if oa == . | da == .
order (oa da after)
save "data/commuteTimesArea.dta", replace
restore


/* generate commute/travel times, by planning area and subzone AND WEEK */

preserve
gcollapse (mean) t, by(op dp week)
drop if op == . | dp == .
/* greshape wide t, by(op) keys(dp) */
/* greshape long t, by(op) keys(dp) */
order (op dp week)
save "data/commuteTimesPlByWeek.dta", replace
restore


preserve
gcollapse (mean) t, by(oa da week yr)
drop if oa == . | da == .
order (oa da week)
save "data/commuteTimesAreaByWeek.dta", replace
restore

// for panel
preserve
gcollapse (mean) t, by(oa da qr)
drop if oa == . | da == .
/* greshape wide t, by(op) keys(dp) */
/* greshape long t, by(op) keys(dp) */
order (oa da qr)
save "data/commuteTimesAreaByWeekPanel.dta", replace
restore

/********************************/
/* KEEP ONLY BUSES              */
/********************************/

keep if JRNY_ORIG_ID_NUM >= 1000 & JRNY_DEST_ID_NUM >= 1000

// 2015-2016
preserve
gcollapse (mean) t, by(oa da week)
drop if oa == . | da == .
order (oa da week)
save "data/commuteTimesAreaByWeekBus.dta", replace
restore

preserve
gcollapse (mean) t, by(op dp week)
drop if op == . | dp == .
order (op dp week)
save "data/commuteTimesPlByWeekBus.dta", replace
restore

cap keep if JRNY_ORIG_ID_NUM >= 1000 & JRNY_DEST_ID_NUM >= 1000
// panel
preserve
gcollapse (mean) t, by(oa da qr)
drop if oa == . | da == .
order (oa da qr)
save "data/commuteTimesAreaByWeekBusPanel.dta", replace
restore

preserve
gcollapse (mean) t, by(op dp qr)
drop if op == . | dp == .
order (op dp qr)
save "${dropbox}/commuteTimesPlByWeekBusPanel.dta", replace
restore


/* generate home planning areas */

gen pHome = op_mode_am if op_mode_am == dp_mode_pm
labdeval pHome, copy(op) replace
gen aHome = oa_mode_am if oa_mode_am == da_mode_pm | pHome != .
labdeval aHome, copy(oa)

/* generate leisure (weekend) trips */
gen dayOfWeek = dow(dofc(START))
frame put id TKT_TYP_CD op dp pHome week dayOfWeek if pHome != ., into(matP)
frame put id TKT_TYP_CD oa da aHome week dayOfWeek if aHome != ., into(matA)

frame matP {
	keep if dayOfWeek == 0 | dayOfWeek == 6 /* | (am == 0 & pm == 0) */
   keep if pHome != dp | op == dp
   gcollapse (count) dayOfWeek, by(id TKT_TYP_CD pHome dp week)
	ren dayOfWeek leisureTrips
	gcollapse (sum) leisureTrips, by(pHome dp week)
	compress
	save "data/matLeisureByWeek.dta", replace
	}

frame matA {
	keep if dayOfWeek == 0 | dayOfWeek == 6 /* | (am == 0 & pm == 0) */
   keep if aHome != da | oa == da
   gcollapse (count) dayOfWeek, by(id TKT_TYP_CD aHome da week)
	ren dayOfWeek leisureTrips
	gcollapse (sum) leisureTrips, by(aHome da week)
	compress
	save "data/matLeisureAByWeek.dta", replace
	}
